// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

/*****************************************************************************/
#ifndef HARDWARE_INTRINSIC
#error Define HARDWARE_INTRINSIC before including this file
#endif
/*****************************************************************************/

// clang-format off

#ifdef FEATURE_HW_INTRINSICS
/* Note
    1) Each hardware intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic`
    2) All the overloads of an intrinsic in an ISA class share one Intrinsic ID
    3) The intrinsic that generates instructions with a fixed imm8 operand has a `ival` field with "not -1" value, e.g., Sse.CompareEqual(v1,v2) -> cmpps xmm0, xmm1, 0
    4) SIMD intrinsics have a non-zero `SIMD size` field based-on that operate over `Vector128<T>`(16) or `Vector256<T>`(32)
    5) Scalar intrinsics that operate over general purpose registers (e.g., Sse41.Crc32) have `SIMD size` with 0
    6) Each intrinsic has a `NumArg` for number of parameters, and some intrinsics that are overloaded on multiple parameter numbers have this field with -1
    7) Each intrinsic has 10 `instructions` fields that list the instructions should be generated based-on the base type
    8) Each intrinsic has one category with type of `enum HWIntrinsicCategory`, please see the definition of HWIntrinsicCategory for details
    9) Each intrinsic has one or more flags with type of `enum HWIntrinsicFlag`
*/
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  Vector128 Intrinsics
HARDWARE_INTRINSIC(Vector128,       As,                                         16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsByte,                                     16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsDouble,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsInt16,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsInt32,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsInt64,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsSByte,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsSingle,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsUInt16,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsUInt32,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsUInt64,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsVector,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsVector2,                                  16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsVector3,                                  16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsVector4,                                  16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       AsVector128,                                16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       Create,                                     16,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128,       CreateScalarUnsafe,                         16,             1,      {INS_mov_i2xmm,         INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       Dot,                                        16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
// The instruction generated for float/double depends on which ISAs are supported
HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                             16,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmppd},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       get_Count,                                  16,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       get_Zero,                                   16,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       GetElement,                                 16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128,       op_Equality,                                16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128,       op_Inequality,                              16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128,       ToScalar,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       ToVector256,                                16,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       ToVector256Unsafe,                          16,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128,       WithElement,                                16,             3,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  Vector256 Intrinsics
HARDWARE_INTRINSIC(Vector256,       As,                                         32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsByte,                                     32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsDouble,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsInt16,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsInt32,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsInt64,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsSByte,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsSingle,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsUInt16,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsUInt32,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsUInt64,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsVector,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       AsVector256,                                32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
// The instruction generated for float/double depends on which ISAs are supported
HARDWARE_INTRINSIC(Vector256,       get_AllBitsSet,                             32,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmppd},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       get_Count,                                  32,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       get_Zero,                                   32,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       Create,                                     32,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256,       CreateScalarUnsafe,                         32,             1,      {INS_mov_i2xmm,         INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       Dot,                                        32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256,       GetElement,                                 32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256,       GetLower,                                   32,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       op_Equality,                                32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256,       op_Inequality,                              32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256,       ToScalar,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256,       WithElement,                                32,             3,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  X86Base Intrinsics
HARDWARE_INTRINSIC(X86Base,         BitScanForward,                             0,              1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_bsf,                INS_bsf,                INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(X86Base,         BitScanReverse,                             0,              1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_bsr,                INS_bsr,                INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  X86Base 64-bit-only Intrinsics
HARDWARE_INTRINSIC(X86Base_X64,     BitScanForward,                             0,              1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_bsf,                INS_bsf,                INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(X86Base_X64,     BitScanReverse,                             0,              1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_bsr,                INS_bsr,                INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE Intrinsics
HARDWARE_INTRINSIC(SSE,             Add,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             AddScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             And,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             AndNot,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andnps,             INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareEqual,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareScalarEqual,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedEqual,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareGreaterThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedGreaterThan,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareScalarGreaterThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedGreaterThan,          16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedGreaterThanOrEqual,     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareScalarGreaterThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedGreaterThanOrEqual,   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareLessThan,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedLessThan,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareScalarLessThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedLessThan,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareLessThanOrEqual,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedLessThanOrEqual,        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareScalarLessThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedLessThanOrEqual,      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareNotEqual,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedNotEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareScalarNotEqual,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedNotEqual,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarNotGreaterThan,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarNotGreaterThanOrEqual,         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareNotLessThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarNotLessThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareNotLessThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarNotLessThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareOrdered,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarOrdered,                       16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             CompareUnordered,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             CompareScalarUnordered,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             ConvertToInt32,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             ConvertScalarToVector128Single,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             ConvertToInt32WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             Divide,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             DivideScalar,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             LoadAlignedVector128,                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movaps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             LoadHigh,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             LoadLow,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             LoadScalarVector128,                        16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             LoadVector128,                              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movups,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             Max,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             MaxScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             Min,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             MinScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             MoveHighToLow,                              16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhlps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE,             MoveLowToHigh,                              16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlhps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE,             MoveMask,                                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movmskps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE,             MoveScalar,                                 16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE,             Multiply,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             MultiplyScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             Or,                                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_orps,               INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE,             Prefetch0,                                   0,              1,     {INS_invalid,           INS_prefetcht0,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             Prefetch1,                                   0,              1,     {INS_invalid,           INS_prefetcht1,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             Prefetch2,                                   0,              1,     {INS_invalid,           INS_prefetcht2,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             PrefetchNonTemporal,                         0,              1,     {INS_invalid,           INS_prefetchnta,        INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             Reciprocal,                                 16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rcpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             ReciprocalScalar,                           16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rcpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             ReciprocalSqrt,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rsqrtps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             ReciprocalSqrtScalar,                       16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rsqrtss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             Shuffle,                                    16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_shufps,             INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE,             Sqrt,                                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtps,             INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             SqrtScalar,                                 16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             Store,                                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movups,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE,             StoreAligned,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movaps,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE,             StoreAlignedNonTemporal,                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movntps,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE,             StoreFence,                                  0,              0,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             StoreHigh,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhps,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE,             StoreLow,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlps,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE,             StoreScalar,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE,             Subtract,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             SubtractScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE,             UnpackHigh,                                 16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpckhps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             UnpackLow,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpcklps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE,             Xor,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_xorps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE 64-bit-only Intrinsics
HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(SSE_X64,         ConvertScalarToVector128Single,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss,           INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE2 Intrinsics
HARDWARE_INTRINSIC(SSE2,            Add,                                        16,              2,     {INS_paddb,             INS_paddb,              INS_paddw,              INS_paddw,              INS_paddd,              INS_paddd,              INS_paddq,              INS_paddq,              INS_invalid,            INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            AddSaturate,                                16,              2,     {INS_paddsb,            INS_paddusb,            INS_paddsw,             INS_paddusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            AddScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            And,                                        16,              2,     {INS_pand,              INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_invalid,            INS_andpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            AndNot,                                     16,              2,     {INS_pandn,             INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_invalid,            INS_andnpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            Average,                                    16,              2,     {INS_invalid,           INS_pavgb,              INS_invalid,            INS_pavgw,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            CompareEqual,                               16,              2,     {INS_pcmpeqb,           INS_pcmpeqb,            INS_pcmpeqw,            INS_pcmpeqw,            INS_pcmpeqd,            INS_pcmpeqd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareScalarEqual,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedEqual,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareGreaterThan,                         16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedGreaterThan,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareScalarGreaterThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedGreaterThan,          16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedGreaterThanOrEqual,     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareScalarGreaterThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedGreaterThanOrEqual,   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareLessThan,                            16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedLessThan,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareScalarLessThan,                      16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedLessThan,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareLessThanOrEqual,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedLessThanOrEqual,        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareScalarLessThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedLessThanOrEqual,      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareNotEqual,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedNotEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareScalarNotEqual,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedNotEqual,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarNotGreaterThan,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarNotGreaterThanOrEqual,         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareNotLessThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarNotLessThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareNotLessThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarNotLessThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareOrdered,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarOrdered,                       16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            CompareUnordered,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            CompareScalarUnordered,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            ConvertToInt32,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertToInt32WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertToUInt32,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Double,                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Double,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2sd,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Int32,                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Int32,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Int32WithTruncation,      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttps2dq,          INS_cvttpd2dq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Single,                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Single,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2ss,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128UInt32,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            Divide,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            DivideScalar,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            Extract,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_pextrw,             INS_pextrw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            Insert,                                     16,              3,     {INS_invalid,           INS_invalid,            INS_pinsrw,             INS_pinsrw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            LoadAlignedVector128,                       16,              1,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_invalid,            INS_movapd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            LoadFence,                                   0,              0,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            LoadHigh,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhpd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            LoadLow,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlpd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            LoadScalarVector128,                        16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_movd,               INS_movq,               INS_movq,               INS_invalid,            INS_movsdsse2},         HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            LoadVector128,                              16,              1,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_invalid,            INS_movupd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            MaskMove,                                   16,              3,     {INS_maskmovdqu,        INS_maskmovdqu,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            Max,                                        16,              2,     {INS_invalid,           INS_pmaxub,             INS_pmaxsw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            MemoryFence,                                 0,              0,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            MaxScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            Min,                                        16,              2,     {INS_invalid,           INS_pminub,             INS_pminsw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            MinScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            MoveMask,                                   16,              1,     {INS_pmovmskb,          INS_pmovmskb,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movmskpd},          HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE2,            MoveScalar,                                 16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movq,               INS_movq,               INS_invalid,            INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE2,            Multiply,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuludq,            INS_invalid,            INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            MultiplyHigh,                               16,              2,     {INS_invalid,           INS_invalid,            INS_pmulhw,             INS_pmulhuw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            MultiplyAddAdjacent,                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pmaddwd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            MultiplyLow,                                16,              2,     {INS_invalid,           INS_invalid,            INS_pmullw,             INS_pmullw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            MultiplyScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            Or,                                         16,              2,     {INS_por,               INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_invalid,            INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2,            PackSignedSaturate,                         16,              2,     {INS_packsswb,          INS_invalid,            INS_packssdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            PackUnsignedSaturate,                       16,              2,     {INS_invalid,           INS_packuswb,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            SumAbsoluteDifferences,                     16,              2,     {INS_invalid,           INS_invalid,             INS_invalid,           INS_psadbw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            ShiftLeftLogical,                           16,              2,     {INS_invalid,           INS_invalid,            INS_psllw,              INS_psllw,              INS_pslld,              INS_pslld,              INS_psllq,              INS_psllq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            ShiftLeftLogical128BitLane,                 16,              2,     {INS_pslldq,            INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            ShiftRightArithmetic,                       16,              2,     {INS_invalid,           INS_invalid,            INS_psraw,              INS_invalid,            INS_psrad,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            ShiftRightLogical,                          16,              2,     {INS_invalid,           INS_invalid,            INS_psrlw,              INS_psrlw,              INS_psrld,              INS_psrld,              INS_psrlq,              INS_psrlq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            ShiftRightLogical128BitLane,                16,              2,     {INS_psrldq,            INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            Shuffle,                                    16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pshufd,             INS_pshufd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_shufpd},            HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            ShuffleHigh,                                16,              2,     {INS_invalid,           INS_invalid,            INS_pshufhw,            INS_pshufhw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            ShuffleLow,                                 16,              2,     {INS_invalid,           INS_invalid,            INS_pshuflw,            INS_pshuflw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE2,            Sqrt,                                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            SqrtScalar,                                 16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtsd},            HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            Store,                                      16,              2,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_invalid,            INS_movupd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            StoreAligned,                               16,              2,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_invalid,            INS_movapd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            StoreAlignedNonTemporal,                    16,              2,     {INS_movntdq,           INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_invalid,            INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            StoreHigh,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhpd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            StoreLow,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlpd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            StoreNonTemporal,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movnti,             INS_movnti,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            StoreScalar,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_movd,               INS_movq,               INS_movq,               INS_invalid,            INS_movsdsse2},         HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2,            Subtract,                                   16,              2,     {INS_psubb,             INS_psubb,              INS_psubw,              INS_psubw,              INS_psubd,              INS_psubd,              INS_psubq,              INS_psubq,              INS_invalid,            INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            SubtractSaturate,                           16,              2,     {INS_psubsb,            INS_psubusb,            INS_psubsw,             INS_psubusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            SubtractScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2,            UnpackHigh,                                 16,              2,     {INS_punpckhbw,         INS_punpckhbw,          INS_punpckhwd,          INS_punpckhwd,          INS_punpckhdq,          INS_punpckhdq,          INS_punpckhqdq,         INS_punpckhqdq,         INS_invalid,            INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            UnpackLow,                                  16,              2,     {INS_punpcklbw,         INS_punpcklbw,          INS_punpcklwd,          INS_punpcklwd,          INS_punpckldq,          INS_punpckldq,          INS_punpcklqdq,         INS_punpcklqdq,         INS_invalid,            INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2,            Xor,                                        16,              2,     {INS_pxor,              INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_invalid,            INS_xorpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE2 64-bit-only Intrinsics
HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_X64,        ConvertToUInt64,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Double,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd,           INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Int64,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128UInt64,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(SSE2_X64,        StoreNonTemporal,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movnti,             INS_movnti,             INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE3 Intrinsics
HARDWARE_INTRINSIC(SSE3,            AddSubtract,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addsubps,           INS_addsubpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE3,            HorizontalAdd,                              16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_haddps,             INS_haddpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE3,            HorizontalSubtract,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_hsubps,             INS_hsubpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE3,            LoadAndDuplicateToVector128,                16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_lddqu,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movddup},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE3,            LoadDquVector128,                           16,              1,     {INS_lddqu,             INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE3,            MoveAndDuplicate,                           16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movddup},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE3,            MoveHighAndDuplicate,                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movshdup,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE3,            MoveLowAndDuplicate,                        16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movsldup,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSSE3 Intrinsics
HARDWARE_INTRINSIC(SSSE3,           Abs,                                        16,              1,     {INS_pabsb,             INS_invalid,            INS_pabsw,              INS_invalid,            INS_pabsd,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSSE3,           AlignRight,                                 16,              3,     {INS_palignr,           INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSSE3,           HorizontalAdd,                              16,              2,     {INS_invalid,           INS_invalid,            INS_phaddw,             INS_phaddw,             INS_phaddd,             INS_phaddd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSSE3,           HorizontalAddSaturate,                      16,              2,     {INS_invalid,           INS_invalid,            INS_phaddsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSSE3,           HorizontalSubtract,                         16,              2,     {INS_invalid,           INS_invalid,            INS_phsubw,             INS_invalid,            INS_phsubd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSSE3,           HorizontalSubtractSaturate,                 16,              2,     {INS_invalid,           INS_invalid,            INS_phsubsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSSE3,           MultiplyAddAdjacent,                        16,              2,     {INS_invalid,           INS_invalid,            INS_pmaddubsw,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSSE3,           MultiplyHighRoundScale,                     16,              2,     {INS_invalid,           INS_invalid,            INS_pmulhrsw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSSE3,           Shuffle,                                    16,              2,     {INS_pshufb,            INS_pshufb,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSSE3,           Sign,                                       16,              2,     {INS_psignb,            INS_invalid,            INS_psignw,             INS_invalid,            INS_psignd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE41 Intrinsics
HARDWARE_INTRINSIC(SSE41,           Blend,                                      16,              3,     {INS_invalid,           INS_invalid,            INS_pblendw,            INS_pblendw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blendps,            INS_blendpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE41,           BlendVariable,                              16,              3,     {INS_pblendvb,          INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_blendvps,           INS_blendvpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41,           Ceiling,                                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           CeilingScalar,                              16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41,           CompareEqual,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pcmpeqq,            INS_pcmpeqq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE41,           ConvertToVector128Int16,                    16,              1,     {INS_pmovsxbw,          INS_pmovzxbw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           ConvertToVector128Int32,                    16,              1,     {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           ConvertToVector128Int64,                    16,              1,     {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           DotProduct,                                 16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_dpps,               INS_dppd},              HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE41,           Extract,                                    16,              2,     {INS_pextrb,            INS_pextrb,             INS_invalid,            INS_invalid,            INS_pextrd,             INS_pextrd,             INS_invalid,            INS_invalid,            INS_extractps,          INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           Floor,                                      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           FloorScalar,                                16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41,           Insert,                                     16,              3,     {INS_pinsrb,            INS_pinsrb,             INS_invalid,            INS_invalid,            INS_pinsrd,             INS_pinsrd,             INS_invalid,            INS_invalid,            INS_insertps,           INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE41,           LoadAlignedVector128NonTemporal,            16,              1,     {INS_movntdqa,          INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           Max,                                        16,              2,     {INS_pmaxsb,            INS_invalid,            INS_invalid,            INS_pmaxuw,             INS_pmaxsd,             INS_pmaxud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41,           Min,                                        16,              2,     {INS_pminsb,            INS_invalid,            INS_invalid,            INS_pminuw,             INS_pminsd,             INS_pminud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41,           MinHorizontal,                              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_phminposuw,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           MultipleSumAbsoluteDifferences,             16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_mpsadbw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE41,           Multiply,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuldq,             INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE41,           MultiplyLow,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pmulld,             INS_pmulld,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41,           PackUnsignedSaturate,                       16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_packusdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE41,           RoundCurrentDirection,                      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           RoundCurrentDirectionScalar,                16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41,           RoundToNearestInteger,                      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           RoundToNearestIntegerScalar,                16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41,           RoundToNegativeInfinity,                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           RoundToNegativeInfinityScalar,              16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41,           RoundToPositiveInfinity,                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           RoundToPositiveInfinityScalar,              16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41,           RoundToZero,                                16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           RoundToZeroScalar,                          16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41,           TestC,                                      16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE41,           TestNotZAndNotC,                            16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(SSE41,           TestZ,                                      16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE41 64-bit-only Intrinsics
HARDWARE_INTRINSIC(SSE41_X64,       Extract,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pextrq,             INS_pextrq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_X64,       Insert,                                     16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pinsrq,             INS_pinsrq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE42 Intrinsics
HARDWARE_INTRINSIC(SSE42,           Crc32,                                       0,              2,     {INS_invalid,           INS_crc32,              INS_invalid,            INS_crc32,              INS_invalid,            INS_crc32,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
HARDWARE_INTRINSIC(SSE42,           CompareGreaterThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE42,           CompareLessThan,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  SSE42 Intrinsics
HARDWARE_INTRINSIC(SSE42_X64,       Crc32,                                       0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_crc32,              INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  AVX Intrinsics
HARDWARE_INTRINSIC(AVX,             Add,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addps,              INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             AddSubtract,                                32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addsubps,           INS_addsubpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             And,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andps,              INS_andpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             AndNot,                                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andnps,             INS_andnpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             Blend,                                      32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blendps,            INS_blendpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX,             BlendVariable,                              32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vblendvps,          INS_vblendvpd},         HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             Ceiling,                                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             BroadcastScalarToVector128,                 16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastss,       INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             BroadcastScalarToVector256,                 32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastss,       INS_vbroadcastsd},      HW_Category_MemoryLoad,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             BroadcastVector128ToVector256,              32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastf128,     INS_vbroadcastf128},    HW_Category_MemoryLoad,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             Compare,                                    32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_IMM,                    HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareEqual,                               32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             CompareGreaterThan,                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareGreaterThanOrEqual,                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareLessThan,                            32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareLessThanOrEqual,                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareNotEqual,                            32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             CompareNotGreaterThan,                      32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareNotGreaterThanOrEqual,               32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareNotLessThan,                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareNotLessThanOrEqual,                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareOrdered,                             32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareUnordered,                           32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             CompareScalar,                              16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_cmpsd},             HW_Category_IMM,                    HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ConvertToVector128Single,                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ConvertToVector256Single,                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ConvertToVector256Double,                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32WithTruncation,      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttpd2dq,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32WithTruncation,      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttps2dq,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             Divide,                                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             DotProduct,                                 32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_dpps,               INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX,             DuplicateEvenIndexed,                       32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movsldup,           INS_movddup},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             DuplicateOddIndexed,                        32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movshdup,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ExtractVector128,                           32,              2,     {INS_vextractf128,      INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128},      HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX,             Floor,                                      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             HorizontalAdd,                              32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_haddps,             INS_haddpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             HorizontalSubtract,                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_hsubps,             INS_hsubpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             InsertVector128,                            32,              3,     {INS_vinsertf128,       INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128},       HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX,             LoadAlignedVector256,                       32,              1,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movaps,             INS_movapd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             LoadDquVector256,                           32,              1,     {INS_lddqu,             INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             LoadVector256,                              32,              1,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             Max,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxps,              INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             Min,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minps,              INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             MaskLoad,                                   -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryLoad,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             MaskStore,                                  -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX,             MoveMask,                                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movmskps,           INS_movmskpd},          HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX,             Multiply,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulps,              INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             Or,                                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_orps,               INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX,             Permute,                                    -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilps,          INS_vpermilpd},         HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX,             Permute2x128,                               32,              3,     {INS_vperm2f128,        INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128},        HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX,             PermuteVar,                                 -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilpsvar,       INS_vpermilpdvar},      HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             Reciprocal,                                 32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rcpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             ReciprocalSqrt,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rsqrtps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             RoundCurrentDirection,                      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             RoundToNearestInteger,                      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             RoundToNegativeInfinity,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             RoundToPositiveInfinity,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             RoundToZero,                                32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             Shuffle,                                    32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_shufps,             INS_shufpd},            HW_Category_IMM,                    HW_Flag_NoRMWSemantics|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX,             Sqrt,                                       32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtps,             INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             Store,                                      32,              2,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX,             StoreAligned,                               32,              2,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movaps,             INS_movapd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX,             StoreAlignedNonTemporal,                    32,              2,     {INS_movntdq,           INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntps,            INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX,             Subtract,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subps,              INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             TestC,                                      -1,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX,             TestNotZAndNotC,                            -1,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX,             TestZ,                                      -1,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX,             UnpackHigh,                                 32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpckhps,           INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             UnpackLow,                                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpcklps,           INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX,             Xor,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_xorps,              INS_xorpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  AVX2 Intrinsics
HARDWARE_INTRINSIC(AVX2,            Abs,                                        32,              1,     {INS_pabsb,             INS_invalid,            INS_pabsw,              INS_invalid,            INS_pabsd,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2,            Add,                                        32,              2,     {INS_paddb,             INS_paddb,              INS_paddw,              INS_paddw,              INS_paddd,              INS_paddd,              INS_paddq,              INS_paddq,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            AddSaturate,                                32,              2,     {INS_paddsb,            INS_paddusb,            INS_paddsw,             INS_paddusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            AlignRight,                                 32,              3,     {INS_palignr,           INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            And,                                        32,              2,     {INS_pand,              INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            AndNot,                                     32,              2,     {INS_pandn,             INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            Average,                                    32,              2,     {INS_invalid,           INS_pavgb,              INS_invalid,            INS_pavgw,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            Blend,                                      -1,              3,     {INS_invalid,           INS_invalid,            INS_pblendw,            INS_pblendw,            INS_vpblendd,           INS_vpblendd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            BlendVariable,                              32,              3,     {INS_vpblendvb,         INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            BroadcastScalarToVector128,                 16,              1,     {INS_vpbroadcastb,      INS_vpbroadcastb,       INS_vpbroadcastw,       INS_vpbroadcastw,       INS_vpbroadcastd,       INS_vpbroadcastd,       INS_vpbroadcastq,       INS_vpbroadcastq,       INS_vbroadcastss,       INS_movddup},           HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
HARDWARE_INTRINSIC(AVX2,            BroadcastScalarToVector256,                 32,              1,     {INS_vpbroadcastb,      INS_vpbroadcastb,       INS_vpbroadcastw,       INS_vpbroadcastw,       INS_vpbroadcastd,       INS_vpbroadcastd,       INS_vpbroadcastq,       INS_vpbroadcastq,       INS_vbroadcastss,       INS_vbroadcastsd},      HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
HARDWARE_INTRINSIC(AVX2,            BroadcastVector128ToVector256,              32,              1,     {INS_vbroadcasti128,    INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            CompareEqual,                               32,              2,     {INS_pcmpeqb,           INS_pcmpeqb,            INS_pcmpeqw,            INS_pcmpeqw,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqq,            INS_pcmpeqq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            CompareGreaterThan,                         32,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            CompareLessThan,                            32,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            ExtractVector128,                           32,              2,     {INS_vextracti128,      INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            ConvertToInt32,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX2,            ConvertToUInt32,                            32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int16,                    32,              1,     {INS_pmovsxbw,          INS_pmovzxbw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int32,                    32,              1,     {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int64,                    32,              1,     {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2,            GatherVector128,                            16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(AVX2,            GatherVector256,                            32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(AVX2,            GatherMaskVector128,                        16,              5,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(AVX2,            GatherMaskVector256,                        32,              5,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(AVX2,            HorizontalAdd,                              32,              2,     {INS_invalid,           INS_invalid,            INS_phaddw,             INS_phaddw,             INS_phaddd,             INS_phaddd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            HorizontalAddSaturate,                      32,              2,     {INS_invalid,           INS_invalid,            INS_phaddsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            HorizontalSubtract,                         32,              2,     {INS_invalid,           INS_invalid,            INS_phsubw,             INS_invalid,            INS_phsubd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            HorizontalSubtractSaturate,                 32,              2,     {INS_invalid,           INS_invalid,            INS_phsubsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            InsertVector128,                            32,              3,     {INS_vinserti128,       INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            LoadAlignedVector256NonTemporal,            32,              1,     {INS_movntdqa,          INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            MaskLoad,                                   -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            MaskStore,                                  -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX2,            Max,                                        32,              2,     {INS_pmaxsb,            INS_pmaxub,             INS_pmaxsw,             INS_pmaxuw,             INS_pmaxsd,             INS_pmaxud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            Min,                                        32,              2,     {INS_pminsb,            INS_pminub,             INS_pminsw,             INS_pminuw,             INS_pminsd,             INS_pminud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            MoveMask,                                   32,              1,     {INS_pmovmskb,          INS_pmovmskb,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2,            Multiply,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuldq,             INS_pmuludq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            MultipleSumAbsoluteDifferences,             32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_mpsadbw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            MultiplyAddAdjacent,                        32,              2,     {INS_invalid,           INS_invalid,            INS_pmaddubsw,          INS_invalid,            INS_pmaddwd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            MultiplyHigh,                               32,              2,     {INS_invalid,           INS_invalid,            INS_pmulhw,             INS_pmulhuw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            MultiplyHighRoundScale,                     32,              2,     {INS_invalid,           INS_invalid,            INS_pmulhrsw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            MultiplyLow,                                32,              2,     {INS_invalid,           INS_invalid,            INS_pmullw,             INS_pmullw,             INS_pmulld,             INS_pmulld,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            Or,                                         32,              2,     {INS_por,               INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2,            Permute2x128,                               32,              3,     {INS_vperm2i128,        INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            Permute4x64,                                32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermq,             INS_vpermq,             INS_invalid,            INS_vpermpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            PermuteVar8x32,                             32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermd,             INS_vpermd,             INS_invalid,            INS_invalid,            INS_vpermps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AVX2,            PackSignedSaturate,                         32,              2,     {INS_packsswb,          INS_invalid,            INS_packssdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            PackUnsignedSaturate,                       32,              2,     {INS_invalid,           INS_packuswb,           INS_invalid,            INS_packusdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogical,                           32,              2,     {INS_invalid,           INS_invalid,            INS_psllw,              INS_psllw,              INS_pslld,              INS_pslld,              INS_psllq,              INS_psllq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogical128BitLane,                 32,              2,     {INS_pslldq,            INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogicalVariable,                   -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsllvd,            INS_vpsllvd,            INS_vpsllvq,            INS_vpsllvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            ShiftRightArithmetic,                       32,              2,     {INS_invalid,           INS_invalid,            INS_psraw,              INS_invalid,            INS_psrad,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            ShiftRightArithmeticVariable,               -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsravd,            INS_vpsravd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            ShiftRightLogical,                          32,              2,     {INS_invalid,           INS_invalid,            INS_psrlw,              INS_psrlw,              INS_psrld,              INS_psrld,              INS_psrlq,              INS_psrlq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            ShiftRightLogical128BitLane,                32,              2,     {INS_psrldq,            INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            ShiftRightLogicalVariable,                  -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsrlvd,            INS_vpsrlvd,            INS_vpsrlvq,            INS_vpsrlvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            Shuffle,                                    32,              2,     {INS_pshufb,            INS_pshufb,             INS_invalid,            INS_invalid,            INS_pshufd,             INS_pshufd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM)
HARDWARE_INTRINSIC(AVX2,            ShuffleHigh,                                32,              2,     {INS_invalid,           INS_invalid,            INS_pshufhw,            INS_pshufhw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            ShuffleLow,                                 32,              2,     {INS_invalid,           INS_invalid,            INS_pshuflw,            INS_pshuflw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2,            Sign,                                       32,              2,     {INS_psignb,            INS_invalid,            INS_psignw,             INS_invalid,            INS_psignd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            SumAbsoluteDifferences,                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_psadbw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            Subtract,                                   32,              2,     {INS_psubb,             INS_psubb,              INS_psubw,              INS_psubw,              INS_psubd,              INS_psubd,              INS_psubq,              INS_psubq,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            SubtractSaturate,                           32,              2,     {INS_psubsb,            INS_psubusb,            INS_psubsw,             INS_psubusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            UnpackHigh,                                 32,              2,     {INS_punpckhbw,         INS_punpckhbw,          INS_punpckhwd,          INS_punpckhwd,          INS_punpckhdq,          INS_punpckhdq,          INS_punpckhqdq,         INS_punpckhqdq,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            UnpackLow,                                  32,              2,     {INS_punpcklbw,         INS_punpcklbw,          INS_punpcklwd,          INS_punpcklwd,          INS_punpckldq,          INS_punpckldq,          INS_punpcklqdq,         INS_punpcklqdq,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2,            Xor,                                        32,              2,     {INS_pxor,              INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  AES Intrinsics
HARDWARE_INTRINSIC(AES,             Decrypt,                                    16,              2,     {INS_invalid,           INS_aesdec,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AES,             DecryptLast,                                16,              2,     {INS_invalid,           INS_aesdeclast,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AES,             Encrypt,                                    16,              2,     {INS_invalid,           INS_aesenc,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AES,             EncryptLast,                                16,              2,     {INS_invalid,           INS_aesenclast,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AES,             InverseMixColumns,                          16,              1,     {INS_invalid,           INS_aesimc,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AES,             KeygenAssist,                               16,              2,     {INS_invalid,           INS_aeskeygenassist,    INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  BMI1 Intrinsics
HARDWARE_INTRINSIC(BMI1,            AndNot,                                      0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_andn,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1,            ExtractLowestSetBit,                         0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_blsi,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1,            GetMaskUpToLowestSetBit,                     0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_blsmsk,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1,            ResetLowestSetBit,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_blsr,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1,            TrailingZeroCount,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_tzcnt,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
HARDWARE_INTRINSIC(BMI1,            BitFieldExtract,                             0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_bextr,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns|HW_Flag_SpecialImport)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  BMI1 Intrinsics
HARDWARE_INTRINSIC(BMI1_X64,        AndNot,                                      0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andn,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1_X64,        ExtractLowestSetBit,                         0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blsi,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1_X64,        GetMaskUpToLowestSetBit,                     0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blsmsk,             INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1_X64,        ResetLowestSetBit,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blsr,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1_X64,        TrailingZeroCount,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_tzcnt,              INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
HARDWARE_INTRINSIC(BMI1_X64,        BitFieldExtract,                             0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_bextr,              INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns|HW_Flag_SpecialImport)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  BMI2 Intrinsics
HARDWARE_INTRINSIC(BMI2,            ParallelBitDeposit,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pdep,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2,            ParallelBitExtract,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pext,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2,            ZeroHighBits,                                0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_bzhi,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2,            MultiplyNoFlags,                             0,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mulx,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  BMI2 Intrinsics
HARDWARE_INTRINSIC(BMI2_X64,        ParallelBitDeposit,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pdep,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64,        ParallelBitExtract,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pext,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64,        ZeroHighBits,                                0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_bzhi,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2_X64,        MultiplyNoFlags,                             0,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulx,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  FMA Intrinsics
HARDWARE_INTRINSIC(FMA,             MultiplyAdd,                                -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ps,        INS_vfmadd213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(FMA,             MultiplyAddNegated,                         -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ps,       INS_vfnmadd213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(FMA,             MultiplyAddNegatedScalar,                   16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ss,       INS_vfnmadd213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(FMA,             MultiplyAddScalar,                          16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ss,        INS_vfmadd213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(FMA,             MultiplyAddSubtract,                        -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmaddsub213ps,     INS_vfmaddsub213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(FMA,             MultiplySubtract,                           -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ps,        INS_vfmsub213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(FMA,             MultiplySubtractAdd,                        -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsubadd213ps,     INS_vfmsubadd213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(FMA,             MultiplySubtractNegated,                    -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ps,       INS_vfnmsub213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(FMA,             MultiplySubtractScalar,                     16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ss,        INS_vfmsub213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(FMA,             MultiplySubtractNegatedScalar,              16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ss,       INS_vfnmsub213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  LZCNT Intrinsics
HARDWARE_INTRINSIC(LZCNT,           LeadingZeroCount,                            0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_lzcnt,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  LZCNT Intrinsics
HARDWARE_INTRINSIC(LZCNT_X64,       LeadingZeroCount,                            0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_lzcnt,              INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  PCLMULQDQ Intrinsics
HARDWARE_INTRINSIC(PCLMULQDQ,       CarrylessMultiply,                          16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pclmulqdq,          INS_pclmulqdq,          INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  POPCNT Intrinsics
HARDWARE_INTRINSIC(POPCNT,          PopCount,                                    0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_popcnt,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//  POPCNT Intrinsics
HARDWARE_INTRINSIC(POPCNT_X64,      PopCount,                                    0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_popcnt,             INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Special intrinsics that are generated during lowering
HARDWARE_INTRINSIC(SSE,             COMISS,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE,             UCOMISS,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            COMISD,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2,            UCOMISD,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41,           PTEST,                                      16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX,             PTEST,                                       0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)

#endif // FEATURE_HW_INTRINSIC

#undef HARDWARE_INTRINSIC

// clang-format on
